{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "3c0a3eef-31e2-4928-b5b1-54efd47c8d41", "metadata": {}, "outputs": [], "source": [ "s3_bucket = ''" ] }, { "cell_type": "code", "execution_count": null, "id": "0932510b-db9d-4051-8c7b-466e34879435", "metadata": {}, "outputs": [], "source": [ "prefix = 'pipeline'" ] }, { "cell_type": "code", "execution_count": null, "id": "eedbb6c3-f1dd-4e91-aff0-6a2696f53a80", "metadata": {}, "outputs": [], "source": [ "endpoint_name = \"AutoGluonEndpoint\"" ] }, { "cell_type": "code", "execution_count": null, "id": "dde4ddb4-045c-4960-8299-ec3942a3260e", "metadata": {}, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", "\n", "import sagemaker" ] }, { "cell_type": "code", "execution_count": null, "id": "5108f9fb-0d09-4d0c-bfd4-b97b49c7d7cb", "metadata": {}, "outputs": [], "source": [ "session = sagemaker.Session()" ] }, { "cell_type": "code", "execution_count": null, "id": "63520c4d-812d-4f1e-889a-e72040c1de6a", "metadata": {}, "outputs": [], "source": [ "!mkdir -p tmp" ] }, { "cell_type": "code", "execution_count": null, "id": "cf529af0-cb38-466f-bd29-fe66e35f669f", "metadata": {}, "outputs": [], "source": [ "test_data_path = f\"s3://{s3_bucket}/{prefix}/output/test/data.csv\"" ] }, { "cell_type": "code", "execution_count": null, "id": "d0d4b967-d07a-40dc-93e6-8ff3979488f9", "metadata": {}, "outputs": [], "source": [ "!aws s3 cp {test_data_path} tmp/test_data.csv" ] }, { "cell_type": "code", "execution_count": null, "id": "b0eb6e9e-4ede-40a3-817c-6af6142cc412", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "test_df = pd.read_csv(\"tmp/test_data.csv\", header=None)\n", "test_df" ] }, { "cell_type": "code", "execution_count": null, "id": "1349e186-9569-467d-a0ed-fb59f0278af0", "metadata": {}, "outputs": [], "source": [ "test_df.rename(\n", " columns={ \n", " test_df.columns[0]: \"is_cancelled\" \n", " }, \n", " inplace = True\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "1bf6bb94-9606-47b4-a599-146973fd4917", "metadata": {}, "outputs": [], "source": [ "test_df" ] }, { "cell_type": "code", "execution_count": null, "id": "09fc5c98-5f4a-4d07-a6a4-ef5df49ba0fc", "metadata": {}, "outputs": [], "source": [ "predictor = Predictor(endpoint_name, session)" ] }, { "cell_type": "code", "execution_count": null, "id": "520a526a-cba7-4235-9ff6-f50f97fdb0a7", "metadata": {}, "outputs": [], "source": [ "from sagemaker.serializers import CSVSerializer\n", "from sagemaker.deserializers import JSONDeserializer\n", "\n", "predictor.serializer = CSVSerializer()\n", "predictor.deserializer = JSONDeserializer()" ] }, { "cell_type": "code", "execution_count": null, "id": "a32fdb4f-7b92-49bc-831a-9360ea48e3f3", "metadata": {}, "outputs": [], "source": [ "import csv\n", "\n", "def get_test_payload(index, test_df=test_df):\n", " test_data = test_df.drop(['is_cancelled'], axis=1)\n", " target_record = test_data.iloc[index]\n", " predictor_values = target_record.to_csv(\n", " header=None,\n", " index=False,\n", " quotechar='\"',\n", " quoting=csv.QUOTE_NONNUMERIC\n", " ).split()\n", " csv_string = ','.join(predictor_values)\n", " return csv_string" ] }, { "cell_type": "code", "execution_count": null, "id": "f70453dd-b18b-416e-a9b8-96f2170be6b1", "metadata": {}, "outputs": [], "source": [ "def get_test_actual_result(index, test_df=test_df):\n", " result = test_df.iloc[index]['is_cancelled']\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "id": "ceecd158-74c7-4513-aabf-93631160c2af", "metadata": {}, "outputs": [], "source": [ "def predict(index, predictor=predictor):\n", " payload = get_test_payload(index)\n", " prediction = predictor.predict([payload])\n", " print(prediction)\n", " [[prob_0, prob_1]] = prediction['probabilities']\n", " \n", " if prob_0 > prob_1:\n", " return 0\n", " else:\n", " return 1" ] }, { "cell_type": "code", "execution_count": null, "id": "f72d33d1-cec5-450a-9fdb-c913d085ef93", "metadata": {}, "outputs": [], "source": [ "predict(5)" ] }, { "cell_type": "code", "execution_count": null, "id": "fd4ea5b0-5d45-4c12-8eb8-2208716aa843", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "from time import sleep\n", "\n", "actual_list = []\n", "predicted_list = []\n", "\n", "for i in range(0, 100):\n", " actual = get_test_actual_result(i)\n", " predicted = predict(i)\n", " print(f\"[iteration # {i}]\")\n", " print(f\"actual = {actual}; predicted = {predicted}\")\n", " \n", " actual_list.append(actual)\n", " predicted_list.append(predicted)\n", "\n", " sleep(0.05)" ] }, { "cell_type": "code", "execution_count": null, "id": "763250f0-ce61-4538-b6bb-2cb2a505f344", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "\n", "target_names = ['not cancelled', 'cancelled']\n", "print(classification_report(actual_list, predicted_list, target_names=target_names))" ] }, { "cell_type": "code", "execution_count": null, "id": "0f0f9ef5-80c5-4644-8d8f-abd4e12819e0", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score\n", "\n", "accuracy_score(actual_list, predicted_list)" ] }, { "cell_type": "code", "execution_count": null, "id": "67d4eaf3-dd14-4f2d-8457-d08653df724d", "metadata": {}, "outputs": [], "source": [ "predictor.delete_endpoint()" ] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }